/*------------------------------------------------------------------------------
 
Project: Mathisen, R.M., Schakel, W., Hense, S., Elsässer, L., Persson, M. and
	Pontusson, J. (forthcoming). 'Unequal Responsiveness and Government
	Partisanship in Northwest Europe,' in Lupu, N. and Pontusson, J. (eds)
	Unequal Democracies: Public Policy, Responsiveness, and Redistribution in an
	Era of Rising Economic Inequality. Cambridge: Cambridge University Press.

Note: This do-file constructs the data set used for all analysis and figures
	("comp_resp_data_pooled"), by combining data from the Netherlands, Germany,
	Norway and Sweden. It is not necessary to run this do-file to replicate the
	chapter, because this do-file constructs the data file that is provided with
	the supplementary material ("mshepp_data"). However, the file is still
	included with the replication materials for the sake of transparancy, as it
	shows how the various sources were transformed into the data file used in
	the analyses.

Data: comp_resp_data_nl (Netherlands, downloaded in June 2021 from
	dx.doi.org/10.1093/ser/mwz018)

	comp_resp_data_de (Germany, not publicly available) 

	comp_resp_data_no (Norway, not publicly available)

	comp_resp_data_se (Sweden, not publicly available)

	Comparative Political Data Set, 1960-2019 (downloaded in November 2021 from
	https://www.cpds-data.org)

Last updated: 2 May 2023

Sections:
	1. Netherlands
	2. Germany
	3. Norway
	4. Sweden
	5. Partisanship indicators
	6. Pooling the data

------------------------------------------------------------------------------*/

* This is where the original datasets are stored
global data_orig "YOUR/PATH/HERE"

* This is where the pooled data will be saved
global mshepp_data "YOUR/PATH/HERE"

/*------------------------------------------------------------------------------
 1. Netherlands
------------------------------------------------------------------------------*/

use "$data_orig/comp_resp_data_nl.dta", clear

drop id_issue id_obs inc1_favor-inc20_other lss surveyorg

* Note that we create temporary data sets, which we merge in section 6 below
tempfile data_nl
save `data_nl'

/*------------------------------------------------------------------------------
 2. Germany
------------------------------------------------------------------------------*/

use "$data_orig/comp_resp_data_de.dta", clear

recode dimension (0 = 1) (1 = 2)
rename question questiontext
drop id_obs *org pop field edulow-bus field coal direction gov_party left_share

tempfile data_de
save `data_de'

/*------------------------------------------------------------------------------
 3. Norway
------------------------------------------------------------------------------*/

use "$data_orig/comp_resp_data_no.dta", clear

encode policyarea, gen(dimtemp)
recode dimtemp (1 = 2) (2 = 1), gen(dimension)
drop inc1_favor-inc12_other id_issue id_obs surveyorg* policyarea dimtemp

tempfile data_no
save `data_no'

/*------------------------------------------------------------------------------
 4. Sweden
------------------------------------------------------------------------------*/

use "$data_orig/comp_resp_data_se.dta", clear

gen surveytype = 1
recode policyarea (0 = 2) (. = 3), gen(dimension)
drop p10 p50 p90
rename np*0 p*0
keep id_c p10 p50 p90 polchange* year surveytype switcher q* dimension

tempfile data_se
save `data_se'

/*------------------------------------------------------------------------------
 5. Partisanship indicators
------------------------------------------------------------------------------*/

use "$data_orig/CPDS_1960-2019_Update_2021.dta", clear

keep if inlist(countryn, 13, 24, 26, 33)
recode countryn (13 = 1) (24 = 2) (26 = 3) (33 = 4), gen(id_country)
tsset id_country year

* Left cabinet share for different time periods (relative to the survey year)
gen leftgov2 = round((gov_left1 + F.gov_left1 + F2.gov_left1) / 300, .01)
gen leftgov4 = round((gov_left1 + F.gov_left1 + F2.gov_left1 +				///
	F3.gov_left1 + F4.gov_left1) / 500, .01)

* Categorical partisanship variable (left prime minister)
gen leftcat = .
replace leftcat = 1 if id_c == 1 & year > 1998 & year < 2006
replace leftcat = 3 if id_c == 1 & inlist(year,2006,2007,2008,2009,2014,	///
	2015,2016,2017,2018)
replace leftcat = 4 if id_c == 1 & inlist(year,1998,2010,2011,2012,2013)

replace leftcat = 2 if id_c == 2 & year > 1994 & year < 2003
replace leftcat = 3 if id_c == 2 & inlist(year,1982,1990,1991,1992,1993,	///
	1994,2007,2008,2009,2010,2013,2014,2015,2016)
replace leftcat = 4 if id_c == 2 & inlist(year,1979,1980,1981,1983,1984,	///
	1985,1986,1987,1988,1989,2003,2004,2005,2006,2011,2012)

replace leftcat = 1 if id_c == 3
replace leftcat = 2 if id_c == 3 & inlist(year,2006,2007,2008,2009,2010,	///
	2011,2012,2013)
replace leftcat = 4 if id_c == 3 & inlist(year,1966,1967,1968,1969,1970,	///
	1973,1982,1983,1984,1985,1990,1998,1999,2002,2003,2004,2005,2014,2015,2016)

replace leftcat = 1 if id_c == 4
replace leftcat = 4 if id_c == 4 & inlist(year,1976,1977,1978,1979,1980,	///
	1981,1982,1991,1992,1993,1994,2006,2007,2008,2009,2010,2011,2012,2013,2014)

recode leftcat (1 2 = 1) (3 4 = 0) // here we recode it into a dummy
label define leftcat 1 "Left PM" 0 "Non-left PM", replace
label values leftcat leftcat

* Years to exclude in analyses using the categorical partisanship variable
gen excl = (leftcat != F.leftcat | leftcat != F2.leftcat)

keep year id_country leftgov* leftcat excl

tempfile cpds
save `cpds'

/*------------------------------------------------------------------------------
 6. Pooling the data
------------------------------------------------------------------------------*/

* Now we merge all the temporary data sets
use `data_nl', clear
append using `data_de'
append using `data_no'
append using `data_se'
merge m:1 id_country year using `cpds'
drop if _merge == 2
drop _merge

label define clab 1 "Germany" 2 "Netherlands" 3 "Norway" 4 "Sweden"			///
	5 "United States", replace

label values id_country clab

recode switcher (. = 0)
drop if const == 1 // here we delete questions involving constitutional changes
drop const
drop if (p10 == . & p50 == . & p90 == .) | (polchange2 == . & polchange4 == .)

* Renaming identifiers
rename id_* *
rename country c
decode c, gen(c2)
order c2, after(c)

* Dimensions
recode dimension (3 = 2), gen(dim)
label define d2lab 1 "Economic" 2 "Non-economic", replace
label values dim dlab
drop dimension

* Time period dummy
gen post97 = (year > 1997), after(year)

* Difference measures
gen diff9010 = p90 - p10
gen diff9050 = p90 - p50
gen diff5010 = p50 - p10
gen diff9010_abs = abs(p90 - p10)
gen diff9050_abs = abs(p90 - p50)
gen diff5010_abs = abs(p50 - p10)

gen coal9050 = (diff9010_abs > .1 & diff5010_abs > .1 & diff9050_abs < .08)
gen coal5010 = (diff9010_abs > .1 & diff9050_abs > .1 & diff5010_abs < .08)

* Weights (note that we have different weights to ensure that, even when using
* subsets of the data, countries are weighed equally)
bys c: gen weight = _N
count if c != 5
replace weight = round((r(N) / 4) / weight, .001)
replace weight = . if c == 5

tab dim, gen(d2) // temporary for loop
gen d9010 = (diff9010_abs > .1 & c != 5) // temporary for loop
gen d9050 = (diff9050_abs > .1 & c != 5) // temporary for loop
recode excl (0 = 1) (1 = 0) // temporary for loop!

quietly foreach v of varlist excl coal9050 coal5010 d21 d22 d9010 d9050 {
	gen weight_`v' = .
	count if `v' == 1 & c != 5
	scalar sc`v' = r(N)

	forval c = 1/4 {
		count if c == `c' & `v' == 1
		replace weight_`v' = r(N) if c == `c'
	}

	replace weight_`v' = round((`=sc`v'' / 4) / weight_`v', .001)
}

recode excl (0 = 1) (1 = 0)
gen weight_coal = weight_coal9050 if coal9050 == 1
replace weight_coal = weight_coal5010 if coal5010 == 1

gen weight_dim = weight_d21 if dim == 1
replace weight_dim = weight_d22 if dim == 2

drop d9* d21 d22 weight_coal9050 weight_coal5010 weight_d21-weight_d22

* Final preparation
order leftgov* leftcat excl weight* q, last
format %-130s questiontext
sort c year

label var c "Country (numeric)"
label var c2 "Country (string)"
label var year "Survey year"
label var post97 "Survey conducted after 1997 (dummy)"
label var surveytype "Survey type (public/commercial)"
label var switcher "Preferences/policy switched from survey"
label var p10 "Share in favor of policy change, 10th income percentile"
label var p50 "Share in favor of policy change, 50th income percentile"
label var p90 "Share in favor of policy change, 90th income percentile"
label var polchange2 "Policy change after two years"
label var polchange4 "Policy change after four years"
label var dim "Political dimension (economic/non-economic)"
label var diff9010 "Preference difference, P90 - P10"
label var diff9050 "Preference difference, P90 - P50"
label var diff5010 "Preference difference, P50 - P10"
label var diff9010_abs "Absolute preference difference, P90 - P10"
label var diff9050_abs "Absolute preference difference, P90 - P50"
label var diff5010_abs "Absolute preference difference, P50 - P10"
label var coal9050 "|P90-P50|<.08, |P90-P10|>.1, |P50-P10|>.1"
label var coal5010 "|P50-P10|<.08, |P90-P50|>.1, |P90-P10|>.1"
label var leftgov2 "Left government cabinet share (t - t+2)"
label var leftgov4 "Left government cabinet share (t - t+4)"
label var leftcat "Left government, dichotomous (t - t+2)"
label var excl "Government changed in t+1 or t+2"
label var weight "Country weight"
label var weight_excl "Country weight when excluding observations"
label var weight_d9010 "Country weight when diff9010_abs > .1"
label var weight_d9050 "Country weight when diff9050_abs > .1"
label var weight_coal "Country weight for coalition analysis"
label var weight_dim "Country weight for dimension analysis"
label var questiontext "Survey question text"

save "$mshepp_data/mshepp_data.dta", replace
